*** Clean Raw Dataset

* Treatments: 	1= NEUTRAL (points= +3 for all)			4 sessions order 2-2-12-12 + 4 sessions reverse order
*				2= CONVERGE  (points=  + 5,  +3, + 1)		4 sessions order 2-2-12-12  + 4 sessions reverse order
*				3= DIVERGE (points= +1, +3, + 5)			4 sessions order 2-2-12-12 + 4 sessions reverse order
*				4= NEUTRAL+ (points= + 5 for all)			4 sessions order 2-2-12-12 + 4 sessions reverse order
*				5= NEUTRAL-CHAT (points= + 3 for all)	4 sessions order 2-2-12-12

clear all
capture log close
cd C:\temp\Data&Methods\DataCleaning
log using "Data_Cleaning`c(current_date)'.log", replace

* Import SUBJECTS + QUIZ data from .xlsx files Session#Treatment# and add treatment variable (k=treatment; i=session)
	forvalues k=1/4{
		forvalues i=1/8{
			import excel using Session`i'T`k'.xlsx, clear
			gen Treatment =`k'
			gen Session=`i'
			if `i'>1 | `k'>1 append using "mainfile"
			save "mainfile", replace
		}
	}

	forvalues k=5/5{
		forvalues i=1/4{
			import excel using Session`i'T`k'.xlsx, clear
			gen Treatment =`k'
			gen Session=`i'
			if `i'>1 | `k'>1 append using "mainfile"
			save "mainfile", replace
		}
	}


* Import SURVEY data from .xlsx files SurveySession#Treatment# and add treatment variable
	forvalues k=1/4{
		forvalues i=1/8{
			import excel using SurveySession`i'T`k'.xlsx, clear
			gen Treatment =`k'
			gen Session=`i'
			if `i'>1 | `k'>1 append using "survey"
			save "survey", replace
		}
	}

	forvalues k=5/5{
		forvalues i=1/4{
			import excel using SurveySession`i'T`k'.xlsx, clear
			gen Treatment =`k'
			gen Session=`i'
			if `i'>1 | `k'>1 append using "survey"
			save "survey", replace
		}
	}


*======================================================
* 		Cleaning SUBJECT Data
*======================================================
	clear all
	capture log close
	cd C:\temp\Data&Methods\DataCleaning

	use mainfile.dta, clear

*Date of session (in column A)
	gen ymd=substr(A,1,6)
	destring ymd, replace

*Time of session
	gen hm=substr(A,8,11)
	destring hm, replace

	sort ymd hm
	order Session
	drop A

*Sort by columns in spreadsheet and delete rows associated with variables not needed
	sort C
	drop if C != "subjects"
	gsort -D
	keep if B==2

* Add: Session header 
	gen temp_row =_n
	drop AX - CC
	drop if D=="Period" & temp_row!=1

* Keep variable about country voting choice (called "opt") and delete the others
	drop B

* Replace headers of columns C to AW with the strings in the first row
	capture 
	foreach var of varlist  C - AW {
	label variable `var' "`=`var'[1]'"
	rename `var' `=`var'[1]'
	}

	drop if temp_row==1

	destring Period - matchCountry, replace
	drop temp_row subjects

	drop Participate treatment profitB* profitY* profitZ* groupSizeNext minZblue maxZblue randM iniType numberPair idInGroup cAmount
	drop tickets beginTicket transfer* partner_tickets
	
	replace timeChoice=. if timeChoice==0
	
	order Treatment Session Subject
	save Union-data.dta, replace


*======================================================
* 		Cleaning  Data about Group Preferences
*======================================================
clear all
capture log close
cd C:\temp\Data&Methods\DataCleaning

use mainfile.dta, clear

	gen ymd=substr(A,1,6)
	destring ymd, replace

	gen hm=substr(A,8,11)
	destring hm, replace

	sort ymd hm

	sort C
	drop if C != "preferences"
	gsort -D

	gen temp_row =_n
	drop if D=="Period" & temp_row!=1

	drop A-C
	drop F-G
	drop I- CC	

	capture 
	foreach var of varlist  D - H  {
	label variable `var' "`=`var'[1]'"
	rename `var' `=`var'[1]'
	}

	drop if temp_row==1

	destring Period - pref, replace
	drop temp_row 

	drop if Period==1
	drop Period 

	rename pref opt

	order ymd hm Subject
	sort ymd hm Subject
	save preferences.dta, replace

	
*======================================================
* 		Merge PREFERENCES and SUBJECTS Data
*======================================================	
	clear all
	capture log close
	cd C:\temp\Data&Methods\DataCleaning

	use Union-data.dta, clear
	order  ymd hm Subject
	sort ymd hm Subject
	save Union-data2.dta, replace

	use preferences.dta
 	order  ymd hm Subject
	sort ymd hm Subject
	save preferences2, replace
 
	use Union-data2
	merge m:1 ymd hm Subject using preferences2
	
	egen temp=min(_merge)

	if temp==3 {
		drop _merge 
		}
	
	drop temp
	save Union-data.dta, replace


*======================================================
* 		Cleaning QUIZ Data
*======================================================
	clear all
	capture log close
	cd C:\temp\Data&Methods\DataCleaning

	use mainfile.dta, clear

	gen ymd=substr(A,1,6)
	destring ymd, replace

	gen hm=substr(A,8,11)
	destring hm, replace

	sort ymd hm
	order Session
	drop A

	keep if C== "subjects" & B==1
	gsort -D

	gen temp_row =_n
	drop if D=="Period" & temp_row!=1
	drop B

	gen ResponseTime1=X
	gen ResponseTime2=Y
	gen ResponseTime3=Z
	gen ResponseTime4=AA
	gen ResponseTime5=AB
	gen ResponseTime6=AC
	gen ResponseTime7=AD
	gen ResponseTime8=AE
	gen ResponseTime9=AF
	gen ResponseTime10=AG
	
	drop N -  CC


	capture 
	foreach var of varlist  C - M {
	label variable `var' "`=`var'[1]'"
	rename `var' `=`var'[1]'
	}

	drop if temp_row==1

	destring Period - RightAnswers, replace
	destring  ResponseTime1 - ResponseTime10, replace

	gen ResponseTime=0
	forvalues i=1/10{
		gen test=ResponseTime`i'
		replace ResponseTime=test+ResponseTime
		drop test
	}

	keep Session Treatment Subject ResponseTime RightAnswers
	order Treatment Session Subject
	save quiz.dta, replace

	
*======================================================
* 		Merge QUIZ and SUBJECTS Data
*======================================================	
	clear all
	capture log close
	cd C:\temp\Data&Methods\DataCleaning

	use Union-data.dta, clear
	sort Treatment Session Subject
	save Union-data2.dta, replace

	 use quiz.dta
	 sort Treatment Session Subject
	 save quiz2, replace
 
	use Union-data2
	merge m:1 Treatment Session Subject using quiz2
	
	egen temp=min(_merge)

	if temp==3 {
		drop _merge 
		}
	
	drop temp
	
	save Union-data.dta, replace


*======================================================
* 		Cleaning SURVEY Data
*======================================================
	clear all
	capture log close
	cd C:\temp\Data&Methods\DataCleaning

	use survey.dta, clear

	gsort  -A -H
	
	gen temp_row =_n
	drop if F=="risk" & temp_row!=1
	replace temp_row =_n
	drop if F=="Ambiguous1" & temp_row!=2
	
	drop F-J


	capture 
		foreach var of varlist  B - M {
		label variable `var' "`=`var'[2]'"
		rename `var' `=`var'[2]'
	}

	drop if temp_row<=2

	gen ymd=substr(A,1,6)
	destring ymd, replace

	gen hm=substr(A,8,11)
	destring hm, replace

	order Treatment Session
	drop A client
	sort ymd hm

	drop N-P

	destring Subject, replace
	
	encode sex, generate(sex2)
	drop sex
	gen sex=sex2
	drop sex2
	replace sex=sex-1
	label define sexl 0 "F" 1 "M" 
	label values sex sexl
	label variable  sex "sex (F=0)"

	order Treatment Session Subject sex 

	drop major comments instructions temp_row
	save survey-data.dta, replace


*======================================================
* 		Merge QUIZ+SUBJECTS Data with SURVEY Data
*======================================================	
	clear all
	capture log close
	cd C:\temp\Data&Methods\DataCleaning

	use Union-data.dta, clear
	sort Treatment Session Subject
	save Union-data2.dta, replace

	 use survey-data.dta
	 sort Treatment Session Subject
	 save survey2, replace
 
	use Union-data2
	merge m:1 Treatment Session Subject using survey2

	egen temp=min(_merge)

	if temp==3 {
		drop _merge
		drop temp
		}

	save Union-data.dta, replace


	capture confirm file mainfile.dta

	if _rc == 0 {
	    di "File exists, deleting ..."
	    erase mainfile.dta
	}


	else {
	    capture noisily error _rc
	    di "File keeps running though..."
	}
	
	erase preferences.dta 
	erase preferences2.dta
	erase quiz.dta
	erase quiz2.dta
	erase survey.dta
	erase survey2.dta
	erase survey-data.dta
	erase Union-data2.dta